Data cleaning
Uploading data
total <- read_csv("data/LTINT_GDP.csv")
df_gdp <- read_csv("data/GDP_yearly.csv")
df_ltint <- read_csv("data/LTINT_yearly.csv")
Making data wide I removed the Flag Codes to make the wide frames more manageable. Flags were not necessary for the analysis.
df_ltint_wide <- df_ltint %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = INDICATOR, values_from = Value)
Using MEASURE variable as the pivot to make it wide (names_from = MEASURE) because there were two different measures of GDP (million USD and per capita) Then dropping the INDICATOR variable that had GDP as values
df_gdp_wide <- df_gdp %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = MEASURE, values_from = Value, names_prefix = "GDP_") %>%
select(-INDICATOR)
Merging GDP and long term interest rates in one dataframe
df_merged <- df_gdp_wide %>% left_join(df_ltint_wide %>% select(LOCATION, TIME, LTINT), by = c("LOCATION","TIME"))
df_debt_ratio <- read_csv("data/debt_gdp_ratio_oecd.csv")
## Parsed with column specification:
## cols(
## LOCATION = col_character(),
## INDICATOR = col_character(),
## SUBJECT = col_character(),
## MEASURE = col_character(),
## FREQUENCY = col_character(),
## TIME = col_double(),
## Value = col_double(),
## `Flag Codes` = col_logical()
## )
df_debt_ratio <- df_debt_ratio %>% rename(debt_gdp = Value)
df_debt_wide <- df_debt_ratio %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = MEASURE, values_from = debt_gdp) %>%
select(-INDICATOR)
df_debt_wide
## # A tibble: 799 x 5
## LOCATION SUBJECT FREQUENCY TIME PC_GDP
## <chr> <chr> <chr> <dbl> <dbl>
## 1 AUS TOT A 1995 57.6
## 2 AUS TOT A 1996 55.4
## 3 AUS TOT A 1997 54.6
## 4 AUS TOT A 1998 52.4
## 5 AUS TOT A 1999 44.8
## 6 AUS TOT A 2000 41.1
## 7 AUS TOT A 2001 40.4
## 8 AUS TOT A 2002 38.7
## 9 AUS TOT A 2003 35.7
## 10 AUS TOT A 2004 32.3
## # … with 789 more rows
Merging dataframes (long term interest rates, debt-to-gdp ratio and gdp)
df_merged <- df_gdp_wide %>%
left_join(df_ltint_wide %>% select(LOCATION, TIME, LTINT), by = c("LOCATION","TIME")) %>%
left_join(df_debt_wide %>% select(LOCATION, TIME, PC_GDP), by = c("LOCATION","TIME")) %>%
rename(debt_gdp = PC_GDP)
df_merged
## # A tibble: 2,485 x 8
## LOCATION SUBJECT FREQUENCY TIME GDP_MLN_USD GDP_USD_CAP LTINT debt_gdp
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUS TOT A 1960 25035. 2409. NA NA
## 2 AUS TOT A 1961 25326. 2380. NA NA
## 3 AUS TOT A 1962 27913. 2574. NA NA
## 4 AUS TOT A 1963 30386. 2748. NA NA
## 5 AUS TOT A 1964 32694. 2898. NA NA
## 6 AUS TOT A 1965 34490. 2998. NA NA
## 7 AUS TOT A 1966 38285. 3271. NA NA
## 8 AUS TOT A 1967 41004. 3442. NA NA
## 9 AUS TOT A 1968 46488. 3828. NA NA
## 10 AUS TOT A 1969 52132. 4202. NA NA
## # … with 2,475 more rows
Creating a natural log of GDP variable
df_merged <- df_merged %>%
mutate(ln_gdp = log(GDP_MLN_USD))
Creating dataset from 2006 (date where long term interest rates data starts)
df_merged_2006_2020 <- df_merged %>% filter(TIME>=2006)
df_merged_2006_2020 %>% write_csv("gdp_ltint_debt2006_2020.csv")
Comparing what countries are in each dataframe
countries_gdp <- df_gdp %>% select(LOCATION) %>% unique()
countries_debt <- df_debt_ratio %>% select(LOCATION) %>% unique()
countries_ltint <- df_ltint %>% select(LOCATION) %>% unique()
setdiff(countries_gdp, countries_debt)
## # A tibble: 29 x 1
## LOCATION
## <chr>
## 1 KOR
## 2 NZL
## 3 CHN
## 4 IND
## 5 IDN
## 6 RUS
## 7 ZAF
## 8 DEW
## 9 EU28
## 10 OECD
## # … with 19 more rows
setdiff(countries_debt, countries_ltint)
## # A tibble: 2 x 1
## LOCATION
## <chr>
## 1 TUR
## 2 EST
setdiff(countries_gdp, countries_ltint)
## # A tibble: 24 x 1
## LOCATION
## <chr>
## 1 TUR
## 2 CHN
## 3 EST
## 4 IDN
## 5 DEW
## 6 EU28
## 7 OECD
## 8 OECDE
## 9 BRA
## 10 SAU
## # … with 14 more rows
df_merged_2006_2020 %>% summary(debt_gdp)
## LOCATION SUBJECT FREQUENCY TIME
## Length:900 Length:900 Length:900 Min. :2006
## Class :character Class :character Class :character 1st Qu.:2009
## Mode :character Mode :character Mode :character Median :2013
## Mean :2013
## 3rd Qu.:2016
## Max. :2020
##
## GDP_MLN_USD GDP_USD_CAP LTINT debt_gdp
## Min. : 9439 Min. : 1551 Min. :-0.5238 Min. : 7.196
## 1st Qu.: 180282 1st Qu.: 20406 1st Qu.: 1.4919 1st Qu.: 45.547
## Median : 443502 Median : 33335 Median : 3.3596 Median : 67.829
## Mean : 3254583 Mean : 33862 Mean : 3.7163 Mean : 76.844
## 3rd Qu.: 2174240 3rd Qu.: 43884 3rd Qu.: 5.2444 3rd Qu.:103.304
## Max. :63079189 Max. :120670 Max. :22.4975 Max. :238.726
## NA's :14 NA's :332 NA's :427
## ln_gdp
## Min. : 9.153
## 1st Qu.:12.102
## Median :13.002
## Mean :13.251
## 3rd Qu.:14.592
## Max. :17.960
##
median_debt <- median(df_merged_2006_2020$debt_gdp, na.rm=T)
Creating a eurozone countries variable
eurozone <- c("AUT","BEL","CYP","EST","FIN","FRA","DEU","GRC","IRL","ITA","LVA","LTU","LUX","MLT","NLD","PRT","SVK","SVN","ESP")
Creating a eurozone & UKcountries variable
eurozone_uk <- c("AUT","BEL","CYP","EST","FIN","FRA","DEU","GRC","IRL","ITA","LVA","LTU","LUX","MLT","NLD","PRT","SVK","SVN","ESP","GBR")
Creating and above-below variable for median of debt-to-gdp in 2013 in eurozone
median_debt_euro_2013 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION %in% eurozone) %>%
pull(debt_gdp) %>% median(na.rm=T)
Creating and above-below variable for median of debt-to-gdp in 2019 in eurozone
median_debt_euro_2019 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
pull(debt_gdp) %>% median(na.rm=T)
Creating a comparators variable for those with similar debt-to-gdp ratios
comp_debt_gdp <- c("BEL","FRA","DEU","IRL","LUX","MLT","NLD","SVK","ESP")
Creating a spread to Germany variable
tab_deu <- df_merged_2006_2020 %>%
filter(LOCATION=="DEU") %>% ## creating a table just for germany
select(TIME,LTINT) %>%
rename(LTINT_DEU = LTINT)
df_merged_2006_2020 <- df_merged_2006_2020 %>%
group_by(LOCATION) %>%
left_join(tab_deu,by="TIME") %>%
ungroup() %>%
mutate(DEU_spread = LTINT-LTINT_DEU)
Data exploring
Debt-to-gdp ratio data for 2020 is very patchy so 2019 will be used as the most recent data point
Plot 1
Visualising the relationship between GDP and long term interest rates in 2019 Colour of the dot corresponds to whether the country is above or below the median debt-to-gdp ratio in 2019
median_debt_2019 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
pull(debt_gdp) %>% median(na.rm=T)
p1 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_2019,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p1)
Visualising the relationship between GDP and long term interest rates in 2013
median_debt_2013 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
pull(debt_gdp) %>% median(na.rm=T)
Plot 2
Scatterplot - Colour of the dot corresponds to whether the country is above or below the median debt-to-gdp ratio in 2013
p2 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_2013,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p2)
Plot 3
Visualising long term interest rates and GDP relationship, colour-coded by above or below median of eurozone debt-to-gdp ratio
p3 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_euro_2019,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p3)
Plot 4
Visualising long term interest rates and debt-to-gdp ratio relationship in eurozone
p4 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
ggplot(aes(LTINT, debt_gdp, colour=LOCATION)) +
geom_point()
ggplotly(p4)
Plot 5
Visualising GDP and debt-to-gdp ratio relationship in eurozone to choose a good comparator (similar debt-to-gdp ratio but different sizes)
p5 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
ggplot(aes(GDP_MLN_USD, debt_gdp, colour=LOCATION)) +
geom_point()
ggplotly(p5)
Thoughts
- We will probably have to use 2019 as the most recent data point throughout because debt-to-gdp data for 2020 is patchy here.
- We can also do analysis for 2020 with the other variables.
- We can do everything just for the eurozone to eliminate noise and the effect of the US as an outlier but we might also choose to do EU (including non-eurozone) and UK to get a bit more info but also eliminate less noise
- Would be good to decide whether the UK always being there is necessary, I think it would probably be good.
- I used the median as the comparison point for debt-to-gdp ratio but we might want to choose something else.
- 2013 does not look that different from 2019 from what I can tell.
- To visualise I can’t think of much else beyond scatterplots, need to pick whether independent variable we care most about is debt-to-gdp ratio or GDP (or both). Could do two different plots, could do debt-to-gdpXlong term interest rates with GDP as size of the dots.
Comparators
- Option 1: Theory-driven comparators. SGC used New Zealand,
- Option 2: Countries that have similar debt-to-gdp ratios but very different GDPs, to see how they compare in terms of long term interest rates. Countries with similar debt-to-gdp ratios to Germany might make for good comparators with very different GDPs. Maybe the Netherlands, Finland, Ireland, Slovakia. See plot 5.
Plot 6
Same as plot 3 but with natural log of GDP Visualising long term interest rates and natural log of GDP relationship, colour-coded by above or below median of eurozone debt-to-gdp ratio
p6 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_euro_2019,TRUE,FALSE)) %>%
ggplot(aes(LTINT, ln_gdp, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p6)
Plot 7
Visualising relationship between debt-to-GDP ratio and long term interest rates with size of bubbles representing the natural log of GDP
p7 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
ggplot(aes(x = LTINT, y = debt_gdp, size = ln_gdp, colour=LOCATION)) +
geom_point(alpha=0.7)
ggplotly(p7)
Plot 8
Visualising relationship between natural log of GDP and long term interest rates only using countries with similar debt-to-GDP ratios
p8 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% comp_debt_gdp) %>%
ggplot(aes(x = LTINT, y = ln_gdp, colour=LOCATION)) +
geom_point(alpha=0.7)
ggplotly(p8)
Plot 9
2019: relationship between debt-to-GDP ratio and long term interest rates with size of bubbles representing GDP (not logged)
p9 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone_uk) %>%
ggplot(aes(x = debt_gdp, y = LTINT, size = GDP_MLN_USD, colour=LOCATION)) +
geom_point(alpha=0.7) +
xlim(0, 201)+ylim(-1, 11)
ggplotly(p9)
Plot 10
2013: Visualising relationship between debt-to-GDP ratio and long term interest rates with size of bubbles representing GDP (not logged)
p9 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION %in% eurozone_uk) %>%
ggplot(aes(x = debt_gdp, y = LTINT, size = GDP_MLN_USD, colour=LOCATION)) +
geom_point(alpha=0.7) +
xlim(0, 201)+ylim(-1, 11)
ggplotly(p9)
Plot 11
2007: Visualising relationship between debt-to-GDP ratio and long term interest rates with size of bubbles representing GDP (not logged)
p9 <- df_merged_2006_2020 %>%
filter(TIME==2007, LOCATION %in% eurozone_uk) %>%
ggplot(aes(x = debt_gdp, y = LTINT, size = GDP_MLN_USD, colour=LOCATION)) +
geom_point(alpha=0.7) +
xlim(0, 201)+ylim(-1, 11)
ggplotly(p9)
Table 1
tab_spreads_deu <- df_merged_2006_2020 %>%
filter(TIME%in%c(2007,2013,2019)) %>%
select(LOCATION,TIME,DEU_spread,LTINT)